/*
 * CReadsSet.cpp
 *
 *  Created on: Jan 18, 2009
 *      Author: yanghoch
 * This class reads and store DNA short reads with fixed length
 * It will firstly get reads from a file (fasta, quality score or txt format),
 * by the extended name of the input files.
 */

#include "ReadInBitsSet.h"

#ifndef MAX_PATH
const int MAX_PATH = 2048;
#endif

CReadInBitsSet::CReadInBitsSet()
{
    this->initialization();
}

CReadInBitsSet::~CReadInBitsSet()
{
    delete this->pReadsSet;
    delete this->pReadsID;
    delete this->pMismatchScores;
    delete this->pQualScores;
}

CReadInBitsSet::CReadInBitsSet(unsigned int capacity, unsigned int uiReadLength, unsigned int allowedNumOfNinRead)
{
    this->initialization();
    this->uiRead_Length = uiReadLength;
    this->allowedNumOfNinRead = allowedNumOfNinRead;
    this->bDiscardReadWithN = (allowedNumOfNinRead == 0);
    CReadInBits::iReadLength = (int)uiReadLength; // Set the Kmer has length
    this->pReadsSet = new vector<CReadInBits>();
    this->pReadsID = new vector<CReadID>();
    this->pReadsSet->reserve(capacity);
    this->pReadsID->reserve(capacity);
    if (this->pReadsSet == NULL || this->pReadsID == NULL) {
        ERR; // Fail to new space storing read
    }
}

/*
 * Given a input filename and the length of the input read.
 * Get reads and save in the data structure of Vector <CKmer>
 */
CReadInBitsSet::CReadInBitsSet(const char* InputFile, const char* fileFormat, unsigned int uiReadLength,\
                               unsigned int allowedNumOfNinRead, unsigned int uiReadStartIndex)
{
    this->initialization();
    this->init(InputFile, fileFormat, uiReadLength, allowedNumOfNinRead, uiReadStartIndex);

}

void CReadInBitsSet::initialization(void)
{
    this->InputFile[0] = '\0';
    this->uiRead_Length = 0;
    this->uiNo_of_Reads = 0;
    this->uiNo_of_Bad_Reads = 0; //Counting the reads with low quality score
    this->pReadsSet = NULL;
    this->pReadsID = NULL;
    this->pMismatchScores = NULL;
    this->pQualScores = NULL;
    this->bDiscardReadWithN = true;
}

void CReadInBitsSet::init(const char* InputFile, const char* fileFormat, unsigned int uiReadLength\
                          , unsigned int allowedNumOfNinRead, unsigned int uiReadStartIndex)
{
    myStrCpy(this->InputFile, InputFile, FILENAME_MAX);
    this->uiRead_Length = uiReadLength;
    this->parser.caNextRead[uiReadLength] = '\0';
    this->allowedNumOfNinRead = allowedNumOfNinRead;
    this->bDiscardReadWithN = (allowedNumOfNinRead == 0);

    this->pReadsSet = new vector<CReadInBits>();
    this->pReadsID = new vector<CReadID>();
    if (this->pReadsSet == NULL || this->pReadsID == NULL) {
        ERR; // Fail to new space storing read
    }
    if (fileExist(InputFile) ) {
        this->openAFileReady2GetRead(InputFile, fileFormat, uiReadStartIndex);
    } else {
        LOG_INFO("Info %d: File %s is not available!\n", WARNING_LOG, InputFile);
    }
}

// clear and reserve the capacity for read, readId and quality
int CReadInBitsSet::clearReads(int capacity)
{
    if (this->pReadsSet != NULL) {
        this->pReadsSet->clear();
        this->pReadsSet->reserve(capacity);
        if (this->pReadsID != NULL) {
            this->pReadsID->clear();
            this->pReadsID->reserve(capacity);
        }
        if (this->pQualScores != NULL) {
            this->pQualScores->clear();
            this->pQualScores->reserve(capacity);
        }
        return(0);
    }
    return(1);
}

bool CReadInBitsSet::handleBadRead(void)
{
    int isBadRead = 0;
    while (this->pReadsID->size() > this->pReadsSet->size()) {
        this->pReadsID->pop_back();
        isBadRead++;
    }
    this->uiNo_of_Bad_Reads += isBadRead; // Check if uiNo_of_Bad_Reads is correct
    return(isBadRead > 0);
}

void CReadInBitsSet::setBadReadOutputFile(FileOutputBuffer* pOut)
{
    this->parser.pOBuf = pOut;
}

// get reads from the file and store (append) in a vector. Return how many reads are read-in.
unsigned int CReadInBitsSet::openAFileReady2GetRead(const char* InputFile, const char* fileFormat,\
        unsigned int uiReadStartIndex)
{
    // TODO: consider to pass bad read file output buffer to openAFileReady2GetRead.
    this->cFileType = this->parser.openAFileReady2GetRead\
                      (InputFile, fileFormat, uiReadStartIndex, this->uiRead_Length, this->bDiscardReadWithN);
    bool bGetQScores = this->bGetQScores();
    if ( this->cFileType == 'N') {
        return(0);
    }
    if (bGetQScores) {
        this->pQualScores = new CReadsQualScores(this->uiRead_Length, BUFFERED_READS_SIZE);
    } else if (this->cFileType == 'S') {
        // check if the quality score with the same basename exist or not
        this->openAFileReady2GetReadQSinQUAL(InputFile, this->uiRead_Length);
    }
    this->clearReads(BUFFERED_READS_SIZE);
    return(BUFFERED_READS_SIZE);
}

unsigned int CReadInBitsSet::get_next_capacity_reads(int capacity, char sep)
{
    if (this->uiNo_of_Reads > 0) {
        printf("Deal read no. %u in %s.\r", this->uiNo_of_Reads, this->InputFile);
    }
    fflush(stdout);
    bool bGetQScoresInFastq = (this->pQualScores != NULL) && this->bGetQScores();
    this->clearReads(capacity);

    do {
        parser.get_Next_Read(); // the next read are in this->parser.caNextRead
        bool isABadRead = isBadRead(this->isSOLiDReadFormat(), this->parser.caNextRead, this->parser.uiRead_Length);
        if (this->parser.caNextRead[0] == '\0') {
            this->parser.pBuf->fflush();
            break; // End of the file
        } else if (!isABadRead && this->save_next_read(this->parser.caNextRead, this->isSOLiDReadFormat())) {
            this->save_next_read_id(this->parser.caNextReadTag, sep);
            if (bGetQScoresInFastq) {
                this->pQualScores->addQSs(this->parser.caNextReadQSs);
            }
        } else {
            this->parser.print_Next_Read();
            this->handleBadRead();
        }
    } while (this->size() < this->capacity());
    this->removeExtraTags();
    this->getQualityScoresFromQUAL();
    return((unsigned int)this->size());
}

void CReadInBitsSet::ignoreQScores(void)
{
    if (this->pQualScores != NULL) {
        delete this->pQualScores;
        this->pQualScores = NULL;
    }
}

inline void makeFakeReadId(char* readIdBuf, unsigned int readIdNo)
{
    printf("\r%uth read has no tag.",  readIdNo);
    sprintf(readIdBuf, "fakeTag%u", readIdNo);
}

void getReadIdFromTagLine(char* readIdStr, const char* tagLine, unsigned int readIdNo, char sep = ',')
{
    int trimStart;
    for (trimStart = 1; tagLine[trimStart]!= '\0'; trimStart++) {
        if (!(isspace(tagLine[trimStart]) || tagLine[trimStart] == sep)) {
            break;
        }
    }
    if ( tagLine[trimStart] == '\0') {
        makeFakeReadId(readIdStr, readIdNo);
    } else {
        strncpy(readIdStr, &tagLine[trimStart], READ_ID_LENGTH - 1);
        readIdStr[READ_ID_LENGTH - 1] = '\0';
        formatReadId(readIdStr, sep);
    }
}

void CReadInBitsSet::get_read_id(int no, char* readId)
{
    int idSize = (int)this->pReadsID->size();
    if (idSize > no) {
        strcpy(readId, this->pReadsID->at(no).id);
    } else {
        sprintf(readId, "Fake_Read_%d", no + 1);
    }
}

inline bool isReadHasNlessThan(const char* read, int threshold)
{
    int counter = 0;
    for (int i = 0; read[i] != '\0'; i++) {
        if (read[i] == '.' || read[i] == 'N') {
            counter ++;
            if (counter > threshold) {
                return(false);
            }
        }
    }
    return(true);
}

bool CReadInBitsSet::save_next_read(bool bSOLiDReadFormat, bool bGetQScores)
{
    // TODO: bGetQScores is not used !!
    return(save_next_read(this->parser.caNextRead, bSOLiDReadFormat));
}

bool CReadInBitsSet::save_next_read(const char* readSeq, bool bSOLiDReadFormat)
{
    bool goodRead = true;
    CReadInBits r;
    if (bSOLiDReadFormat) {
        if (this->bDiscardReadWithN) {
            goodRead = encodeColors(readSeq, r);
        } else {
            if (isReadHasNlessThan(readSeq, this->allowedNumOfNinRead)) {
                goodRead = encodeColorsNas3(readSeq, r);
            } else {
                return(false);
            }
        }
    } else {
        if (this->bDiscardReadWithN) {
            goodRead = (r.encode(readSeq, this->uiRead_Length) == 0);
        } else {
            if (isReadHasNlessThan(readSeq, this->allowedNumOfNinRead)) {
                goodRead = (r.encodeRead_NasA(readSeq, this->uiRead_Length) == 0);
            } else {
                return(false);
            }
        }
    }
    if (goodRead) {
        this->uiNo_of_Reads++;
        this->pReadsSet->push_back(r);
        return(true);
    }
    return(false);
}

bool CReadInBitsSet::save_next_colorStr_as_a_read(const char* readSeq)
{
    bool goodRead;
    CReadInBits r;
    if (this->bDiscardReadWithN) {
        goodRead = encodePureColors(readSeq, r);
    } else if (isReadHasNlessThan(readSeq, this->allowedNumOfNinRead)) {
        goodRead = encodePureColorsNas3(readSeq, r);
    } else {
        goodRead = false;
    }
    if (goodRead) {
        this->uiNo_of_Reads++;
        this->pReadsSet->push_back(r);
        return(true);
    }
    return(false);
}

void CReadInBitsSet::save_next_read_id(char sep)
{
    this->save_next_read_id(this->parser.caNextReadTag, sep);
}

void CReadInBitsSet::save_next_read_id(const char* tagLine, char sep)
{
    CReadID tag; // Save the read ID in the vector
    unsigned int readIdNo = (unsigned int)this->pReadsID->size();
    if ((readIdNo + 1) == this->pReadsSet->size()) {
        getReadIdFromTagLine(tag.id, tagLine, readIdNo, sep);
        this->pReadsID->push_back(tag);
    } else if (readIdNo >= this->pReadsSet->size()) {
        printf("\r%uth read has more than one tag.",  readIdNo - 1);
        getReadIdFromTagLine(tag.id, tagLine, readIdNo, sep);
        this->pReadsID->pop_back();
        this->pReadsID->push_back(tag);
    } else { // Make fake read Id tags
        for (int readID = readIdNo + 1; readID < (int)this->pReadsSet->size(); readID++) {
            makeFakeReadId(tag.id, readID);
            this->pReadsID->push_back(tag);
        }
        getReadIdFromTagLine(tag.id, tagLine, readIdNo, sep);
        this->pReadsID->push_back(tag);
    }
}

int printMissReads(const char* outputfile, CReadInBitsSet& readsSet, int missMatchScoreT)
{
    if (readsSet.uiNo_of_Reads > 0 && readsSet.pMismatchScores != NULL) {
        ofstream ofile(outputfile);
        int missReadsNo = 0;
        for (unsigned int i = 0; i < readsSet.pReadsSet->size(); i++) {
            char caRead[wordSize + 1];
            if ((int)readsSet.pMismatchScores->mismatchScore[i] >= missMatchScoreT) {
                if (readsSet.cFileType == 'S') { // csfasta format for solid read
                    decodeColors(caRead, readsSet.pReadsSet->at(i));
                } else {
                    readsSet.pReadsSet->at(i).decode(caRead);
                }
                ofile << i << ',' << caRead << endl;
                missReadsNo++;
            }
        }
        ofile.close();
        return(missReadsNo);
    } else {
        LOG_INFO("Info %d: Reads set is empty or haven't been mapped!\n", INFO_LOG);
    }
    return(-1);
}

unsigned int CReadInBitsSet::openAFileReady2GetReadQSinQUAL(const char* InputFile, unsigned int readQsLength)
{
    char qualFile[MAX_LINE];
    strcpy(qualFile, InputFile);
    chExtName(qualFile, ".QUAL");
    if (!fileExist(qualFile)) {
        chExtName(qualFile, ".qual");
        if (!fileExist(qualFile)) {
            chExtName(qualFile, "_QV.QUAL");
            if (!fileExist(qualFile)) {
                chExtName(qualFile, ".qual");
            }
        }
    }

    if (fileExist(qualFile)) {
        LOG_INFO("Info %d: Get quality scores from %s!\n", INFO_LOG, qualFile);
        if (this->pQualScores != NULL) {
            delete this->pQualScores;
            this->pQualScores = NULL;
        }
        if (this->pQualScores == NULL) {
            this->pQualScores = new CReadsQualScores(readQsLength, BUFFERED_READS_SIZE);
            this->pQualScores->openQUALfile(qualFile);
        }
        return(BUFFERED_READS_SIZE);
    } else {
        LOG_INFO("Info %d: Quality score file %s is not available!\n", INFO_LOG, qualFile);
    }
    return(0);
}

void CReadInBitsSet::removeExtraTags(void)
{
    for (unsigned int i = (unsigned int)(this->pReadsID->size()); i > this->uiNo_of_Reads; i--) {
        this->pReadsID->pop_back(); // remove extra tags
    }
}

void CReadInBitsSet::getQualityScoresFromQUAL(void)
{
    if (this->pQualScores != NULL && this->cFileType == 'S') {
        this->pQualScores->getQualityScoresFromQUAL(this->pReadsID);
    }
}

const char* CReadInBitsSet::get_Next_Read(void)
{
    return(this->parser.get_Next_Read());
}

void CReadInBitsSet::flushParser(void)
{
    this->parser.pBuf->fflush();
}
